/***
This do-file creates 2 figures: (1) employment in Tracker vs CES vs CPS and (2)
Accomodation and Food Services and Professional Services Employment in Tracker
vs CES.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Employment"

* Create required subfolders
cap mkdir "${root}/results/Employment"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

* Erase output numbers
cap erase "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Pooling All Industries.yaml"
cap erase "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Accommodation and Food Services vs Professional Services.yaml"

*-------------------------------------------------------------------------------
**# 1 - Aggregate Employment Time Series vs CES and CPS
*-------------------------------------------------------------------------------

*-------------------------------------------------------------------------------
* Prepare tracker data
*-------------------------------------------------------------------------------

* Load data
project, uses("${root}/data/web/data/Employment - National - Weekly.csv")
import delimited "${root}/data/web/data/Employment - National - Weekly.csv", clear

gen date = mdy(month, day_endofweek, year)
format date %td
rename day_endofweek day

* Keep relevant variables
keep year month day date emp

* Express in percentage terms
replace emp = emp * 100

* Save as temporary file
tempfile tracker_employment
save `tracker_employment'

*-------------------------------------------------------------------------------
* Prepare CES data
*-------------------------------------------------------------------------------

* Load data
project, uses("${root}/data/dvc/St Louis Fed/private_nonfarm_emp_CEU0500000001.csv")
import delimited "${root}/data/dvc/St Louis Fed/private_nonfarm_emp_CEU0500000001.csv", clear

gen year = real(substr(date, 1, 4))
gen month = real(substr(date, 6, 2))
rename ceu0500000001 employment_ces
assert !mi(year, month, employment_ces)

keep year month employment_ces
keep if inrange(year, 2020, 2022)
gen date = mdy(month, 15, year)

gen jan = employment_ces if date == mdy(1, 15, 2020)
gegen base = mean(jan)
drop jan

gen norm_emp_ces = (employment_ces / base - 1) * 100

gen Fridays = date - dow(date) + 5                                              
format Fridays %td
drop date
rename Fridays date

tempfile ces_national
save `ces_national'

*-------------------------------------------------------------------------------
* Prepare CPS count of job holders
*-------------------------------------------------------------------------------

* Load CPS file
project, uses("${root}/data/derived/CPS/CPS National Monthly.dta")
use "${root}/data/derived/CPS/CPS National Monthly.dta", clear

gen Fridays = date - dow(date) + 5                                              
format Fridays %td

drop date
rename Fridays date

* Save as tempfile
keep date norm_emp_cps
tempfile cps
save `cps'

*-------------------------------------------------------------------------------
* Merge together
*-------------------------------------------------------------------------------

use `tracker_employment', clear
merge 1:1 date using `ces_national', nogen
merge 1:1 date using `cps',  nogen

* Restrict dates
keep if date <= ${finaldate}

* Get changes
keep if emp != . & norm_emp_ces != . & norm_emp_cps != .

sort date
gen change_emp = emp - emp[_n-1] if _n > 1
replace change_emp = 0 if _n == 1
gen change_ces = norm_emp_ces - norm_emp_ces[_n-1]
replace change_ces = 0 if _n == 1
gen change_cps = norm_emp_cps - norm_emp_cps[_n-1]
replace change_cps = 0 if _n == 1

*-------------------------------------------------------------------------------
* Calculate RMSEs
*-------------------------------------------------------------------------------

gen ces_dif = change_emp - change_ces if !mi(change_ces)
gen ces_dif_sq = ces_dif ^ 2
su ces_dif_sq
local ces_mse = `r(mean)'
local ces_rmse : di %4.2f sqrt(`ces_mse')
di `ces_rmse'

gen cps_dif = change_emp - change_cps if !mi(change_cps)
gen cps_dif_sq = cps_dif ^ 2
su cps_dif_sq
local cps_mse = `r(mean)'
local cps_rmse : di %4.2f sqrt(`cps_mse')
di `cps_rmse'

*-------------------------------------------------------------------------------
* Calculate Correlations
*-------------------------------------------------------------------------------
corr change_emp change_ces
local corr_ces: di %4.2f `r(rho)'

corr change_emp change_cps
local corr_cps: di %4.2f `r(rho)'

*-------------------------------------------------------------------------------
* Plot
*-------------------------------------------------------------------------------

sum date
local min = `r(min)'
local max = ${finaldate}

local apr = td(17apr2020)
local nov = td(15nov2021)
local apr22 = td(15apr2022)

tw ///
	(connected change_emp date, sort color(oi2)) ///
	(connected change_ces date if change_ces != ., sort color(oi3)) ///
	(connected change_cps date if change_cps != ., sort color(oi1) lpattern(dash)) ///
	, ///
	xlab(`=mdy(1, 15, 2020)' `""Jan" "2020""'  `=mdy(3, 15, 2020)' "Mar" ///
	`=mdy(5, 15, 2020)' "May" ///
	`=mdy(7, 15, 2020)' "Jul" `=mdy(9, 15, 2020)' "Sep" ///
	`=mdy(11, 15, 2020)' "Nov" ///
	`=mdy(1, 15, 2021)' `""Jan" "2021""' `=mdy(3, 15, 2021)' "Mar" ///
	`=mdy(5, 15, 2021)' "May" ///
	`=mdy(7, 15, 2021)' "Jul" `=mdy(9, 15, 2021)' "Sep" `=mdy(11, 15, 2021)' "Nov"  ///
	,format(%tdm) labsize(small)) xtitle(" ") ///
	${title_`version'} ///
	legend(order(1 "Tracker Employment Series"  2 "CES" 3 "CPS") size(small) col(3) ring(1) pos(6) symxsize(9)) ///
	ylabel(-20 "-20 p.p." -10 "-10 p.p." 0 "0 p.p." 10 "+10 p.p.", nogrid) ///
	ytitle("Month-to-Month Change" "in Employment (p.p)") ///
	xline(`apr', lpattern(dash) lcolor(gs6)) ///
	text(9 `=`apr'+6' "April 15 2020", place(3) color(gs6) size(small)) ///
	text(-18.5 `=${finaldate}-200' "RMSE CES: `ces_rmse' p.p." "Corr CES: `corr_ces'", color(gs6) size(small) justification(left)) ///
	text(-18.5 `=${finaldate}-70' "RMSE CPS: `cps_rmse' p.p." "Corr CPS: `corr_cps'", color(gs6) size(small) justification(left)) ///
	xsize(11)

oi_graph_export "${root}/results/Employment/Employment in Tracker vs CES vs CPS - changes - long", type(${fig_type})

*-------------------------------------------------------------------------------
* Export output numbers to csv file
*-------------------------------------------------------------------------------

yamlout using "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Pooling All Industries.yaml", ///
	key("emp_ces_rmse") ///
	comment("RMSE Tracker Employment vs CES") ///
	value(`ces_rmse') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Pooling All Industries.yaml", ///
	key("emp_cps_rmse") ///
	comment("RMSE Tracker Employment vs CPS") ///
	value(`cps_rmse') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Pooling All Industries.yaml", ///
	key("emp_ces_corr") ///
	comment("Correlation Tracker Employment vs CES") ///
	value(`corr_ces') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Pooling All Industries.yaml", ///
	key("emp_cps_corr") ///
	comment("Correlation Tracker Employment vs CPS") ///
	value(`corr_cps') fmt(%9.2f)

project, creates("${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Pooling All Industries.yaml")

*----------------------------------------------------------------------------------------
**# 2 - Employment Decline in Food, Health and Education Services, and Professional Services
*---------------------------------------------------------------------------------------

*-------------------------------------------------------------------------------
* Prepare tracker data
*-------------------------------------------------------------------------------

* Load data for ss 60 (Professional and Business Services)
project, uses("${root}/data/web/data/Employment - National - Weekly.csv")
import delimited "${root}/data/web/data/Employment - National - Weekly.csv", clear

gen date = mdy(month, day_endofweek, year)
replace emp_ss60 = emp_ss60 * 100
keep date emp_ss60

tempfile ss_60
save `ss_60'

* Load data for naics 72 (Food and Accommodation Services)
project, uses("${root}/data/web/data/Employment - National - Weekly.csv")
import delimited "${root}/data/web/data/Employment - National - Weekly.csv", clear

gen date = mdy(month, day_endofweek, year)
replace emp_s72 = emp_s72 * 100
keep date emp_s72

merge 1:1 date using `ss_60', nogen

tempfile employment_sector
save `employment_sector'

*-------------------------------------------------------------------------------
* Prepare CES data
*-------------------------------------------------------------------------------

* Import relevant CES Files
project, uses("${root}/data/dvc/St Louis Fed/accommodation_food_emp_CEU7072000001.csv")
import delimited "${root}/data/dvc/St Louis Fed/accommodation_food_emp_CEU7072000001.csv", clear

gen year = real(substr(date, 1, 4))
gen month = real(substr(date, 6, 2))
rename ceu7072000001 employment_ces
assert !mi(year, month, employment_ces)

keep year month employment_ces
keep if inrange(year, 2020, 2022)
gen date = mdy(month, 15, year)

gen jan = employment_ces if date == mdy(1, 15, 2020)
gegen base = mean(jan)
drop jan

gen norm_employment_ces = (employment_ces / base - 1) * 100

rename norm_employment_ces ces_norm_72

gen Fridays = date - dow(date) + 5                                              
format Fridays %td
drop date
rename Fridays date

tempfile ces_72
save `ces_72'

* Next, NAICS SS 60
project, uses("${root}/data/dvc/St Louis Fed/business_professional_emp_CEU6000000001.csv")
import delimited "${root}/data/dvc/St Louis Fed/business_professional_emp_CEU6000000001.csv", clear

gen year = real(substr(date, 1, 4))
gen month = real(substr(date, 6, 2))
rename ceu6000000001 employment_ces
assert !mi(year, month, employment_ces)

keep year month employment_ces
keep if inrange(year, 2020, 2022)
gen date = mdy(month, 15, year)

gen jan = employment_ces if date == mdy(1, 15, 2020)
gegen base = mean(jan)
drop jan

gen norm_employment_ces = (employment_ces / base - 1) * 100

rename norm_employment_ces ces_norm_60

gen Fridays = date - dow(date) + 5                                              
format Fridays %td
drop date
rename Fridays date

tempfile ces_60
save `ces_60'

*-------------------------------------------------------------------------------
* Merge
*-------------------------------------------------------------------------------

use `employment_sector', replace
merge 1:1 date using `ces_72', nogen keep(3)
merge 1:1 date using `ces_60', nogen keep(3)

* Restrict dates
keep if date <= ${finaldate}

* Get changes

sort date
gen change_emp_s72 = emp_s72 - emp_s72[_n-1] if _n > 1
replace change_emp_s72 = 0 if _n == 1
gen change_emp_ss60 = emp_ss60 - emp_ss60[_n-1]
replace change_emp_ss60 = 0 if _n == 1
gen change_ces_norm_72 = ces_norm_72 - ces_norm_72[_n-1]
replace change_ces_norm_72 = 0 if _n == 1
gen change_ces_norm_60 = ces_norm_60 - ces_norm_60[_n-1]
replace change_ces_norm_60 = 0 if _n == 1

* Calculate RMSEs
local last_date = mdy(4, 15, 2022)
keep if date <= `last_date'

gen ss72_dif = change_emp_s72 / 100 - change_ces_norm_72 / 100 if !mi(change_ces_norm_72)
gen ss72_dif_sq = ss72_dif * ss72_dif
su ss72_dif_sq
local ss72_mse = `r(mean)'
local ss72_rmse = round(100 * sqrt(`ss72_mse'), 0.01)
di "RMSE ss72 is " "`ss72_rmse'"

gen ss60_dif = change_emp_ss60 / 100 - change_ces_norm_60 / 100 if !mi(change_ces_norm_60)
gen ss60_dif_sq = ss60_dif * ss60_dif
su ss60_dif_sq
local ss60_mse = `r(mean)'
local ss60_rmse = round(100 * sqrt(`ss60_mse'), 0.01)
di "RMSE ss60 is "`ss60_rmse'

*-------------------------------------------------------------------------------
* Plot
*-------------------------------------------------------------------------------

sum date
local min = `r(min)'
local max = mdy(4, 15, 2022)
di "`max'"

local apr = td(17apr2020)
local apr22 = td(15apr2022)

keep date change_emp_s72 change_ces_norm_72 change_emp_ss60 change_ces_norm_60

corr change_emp_s72 change_ces_norm_72
local corr_afs: di %4.3f `r(rho)'

corr change_emp_ss60 change_ces_norm_60
local corr_prof: di %4.2f `r(rho)'

rename change_emp_s72 change_emp_72
rename change_emp_ss60 change_emp_60

reshape long change_emp_ change_ces_norm_, i(date) j(sector)

tostring sector, replace

replace sector = "Food and Accommodation Services" if sector == "72"
replace sector = "Professional Services" if sector == "60"

tw ///
	(connected change_emp_ date , sort color(oi2)) ///
	(connected change_ces_norm_ date  , sort color(oi3)), ///
	by(sector, note("") imargin(medium) ) aspectratio(0.7) ///
	xlab(`=mdy(1, 15, 2020)' `""Jan" "2020""'   ///
	`=mdy(5, 15, 2020)' "May" ///
	`=mdy(9, 15, 2020)' "Sep" ///
	`=mdy(1, 15, 2021)' `""Jan" "2021""' ///
	`=mdy(5, 15, 2021)' "May" ///
	`=mdy(9, 15, 2021)' "Sep"   ///
	, format(%tdm) labsize(small)) xtitle(" ") ${title_`version'} ///
		legend(order(1 "Tracker Employment Series" 2 "CES") size(small) col(2) ring(1) pos(6) symxsize(huge)) ///
	ylabel(-40 "-40 p.p." -20 "-20 p.p." 0 "0 p.p." 20 "+20 p.p.", nogrid) ///
	ytitle("Month-to-Month Change" "in Employment (p.p.)") ///
	xline(`apr', lpattern(dash) lcolor(gs6)) ///
	text(20 `=`apr'+15' "April 15 2020", place(3) color(gs6) size(small)) ///
	xsize(11)

addplot 1:, text(-40 `=`last_date'-95'  `"RMSE: `=string(`ss72_rmse', "%3.2f")' p.p."' `"Correlation: `=string(`corr_afs', "%3.2f")'"', color(gs8) placement(west) justification(left) size(medsmall)) norescaling ///
			xlab(`=mdy(1, 15, 2020)' `""Jan" "2020""'   ///
				 `=mdy(5, 15, 2020)' "May" ///
				 `=mdy(9, 15, 2020)' "Sep" ///
				 `=mdy(1, 15, 2021)' `""Jan" "2021""' ///
				 `=mdy(5, 15, 2021)' "May" ///
				 `=mdy(9, 15, 2021)' "Sep"  ///
				 , format(%tdm) labsize(small)) ///
			xtitle(" ") ${title_`version'} ///
			subtitle(, fcolor(gs15) lcolor(gs15)) ///
			legend(order(1 "Tracker Employment Series" 2 "CES") size(small) col(2) ring(1) pos(6) symxsize(huge))

addplot 2:, text(-40 `=`last_date'-95' `"RMSE: `=string(`ss60_rmse', "%3.2f")' p.p."' `"Correlation: `=string(`corr_prof', "%3.2f")'"', color(gs8) placement(west) justification(left) size(medsmall)) norescaling ///
			xlab(`=mdy(1, 15, 2020)' `""Jan" "2020""'   ///
				 `=mdy(5, 15, 2020)' "May" ///
				 `=mdy(9, 15, 2020)' "Sep" ///
				 `=mdy(1, 15, 2021)' `""Jan" "2021""' ///
				 `=mdy(5, 15, 2021)' "May" ///
				 `=mdy(9, 15, 2021)' "Sep"  ///
				 , format(%tdm) labsize(small)) ///
			ylabel(-40 "-40" -20 "-20" 0 "0" 20 "20", nogrid) ///
			xtitle(" ")  ytitle("") ${title_`version'} ///
			legend(order(1 "Tracker Employment Series" 2 "CES") size(small) col(2) ring(1) pos(6) symxsize(huge))


oi_graph_export "${root}/results/Employment/Employment AFS and PS in Tracker vs CES - changes - long", type(${fig_type})

*-------------------------------------------------------------------------------
* Export output numbers to csv file
*-------------------------------------------------------------------------------

yamlout using "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Accommodation and Food Services vs Professional Services.yaml", ///
	key("emp_ces_food_rmse") ///
	comment("RMSE Tracker Employment vs CES - Food and Accommodation Services (p.p.)") ///
	value(`ss72_rmse') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Accommodation and Food Services vs Professional Services.yaml", ///
	key("emp_ces_food_corr") ///
	comment("Correlation Tracker Employment vs CES - Food and Accommodation Services (p.p.)") ///
	value(`corr_afs') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Accommodation and Food Services vs Professional Services.yaml", ///
	key("emp_cps_prof_rmse") ///
	comment("RMSE Tracker Employment vs CPS - Professional Services") ///
	value(`ss60_rmse') fmt(%9.2f)

yamlout using "${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Accommodation and Food Services vs Professional Services.yaml", ///
	key("emp_cps_prof_corr") ///
	comment("Correlation Tracker Employment vs CPS - Professional Services") ///
	value(`corr_prof') fmt(%9.2f)

project, creates("${root}/results/paper numbers/`category'/Changes in Employment Rates Over Time Accommodation and Food Services vs Professional Services.yaml")
